rm(list = ls())
here::i_am(file.path("code", "09_aa_cards.R"))
library(here)
source(here("code", "config.R"))

setFixest_dict(
  c(
    "vet_combined" = "Veteran",
    "order_num_serial_norm" = "Order number (scaled)",
    "birthyr_cards" = "Birth year",
    "bpl_cards" = "Birth state",
    "exemption" = "Exemption",
    "married_cards" = "Married",
    "farmer" = "Farmer",
    "laborer" = "Laborer",
    "farm_laborer" = "Farm laborer",
    "board_identifier" = "Draft board",
    "is_aa" = "Community leader"
  )
)

################################################################################
# Make analysis data

# NB: we can't use "analysis.parquet" because we want to include all cards,
# not just the ones we match to census

baseline_match_type <- "standard_match_0"

census <- read_parquet(here("data", "analysis", "census.parquet"))
cards <- read_parquet(here("data", "analysis", "cards.parquet"))

matches <- read_parquet(
  here("data", "analysis", "cards_census_matches.parquet"),
  col_select = c("card_id", "standard_match_0")
) |>
  rename(census_id = standard_match_0)

aa <- read_parquet(here("data", "analysis", "cards_aa_matches.parquet"))

est_df <- cards |>
  left_join(
    matches,
    by = "card_id"
  ) |>
  left_join(
    census,
    by = "census_id"
  ) |>
  left_join(aa |> mutate(is_aa = TRUE), by = "card_id") |>
  mutate(
    is_aa = replace_na(is_aa, FALSE),
    across(starts_with("occ_"), \(x) replace_na(x, FALSE)),
    order_num_serial_norm =  local_order_num_from_serial/registrants_second_report,
    vet_combined = as.integer(vet_cards | replace_na(vet_c1930, FALSE))
  ) |>
  drop_na(order_num_serial_norm, birthyr_cards, bpl_cards) |>
  filter(birthyr_cards >= 1880, birthyr_cards <= 1900)

################################################################################
# Baseline estimates

ols_all <- feols(
  is_aa ~ vet_combined | csw(board_identifier, birthyr_cards + bpl_cards,
                   exemption^married_cards, farmer + laborer + farm_laborer),
  data = est_df |> filter(!is.na(order_num_serial_norm)),
  vcov = "hetero"
)

ols_matched <- feols(
  is_aa ~ vet_combined | birthyr_cards + bpl_cards + board_identifier +
    exemption^married_cards + farmer + laborer + farm_laborer,
  data = est_df |> filter(!is.na(order_num_serial_norm), !is.na(census_id)),
  vcov = "hetero"
)

my_nonvet <- c(
  ols_all |>
    map_dbl(\(x) {
      rhs <- model.matrix(x)
      lhs <-  model.matrix(x, type = "lhs")
      mean(lhs[rhs == 0])
    }) |>
    as.numeric(),
  list(ols_matched) |>
    map_dbl(\(x) {
      rhs <- model.matrix(x)
      lhs <-  model.matrix(x, type = "lhs")
      mean(lhs[rhs == 0])
    }) |>
    as.numeric()
)

ols_coefs <- sprintf(
  "%.04f",
  c(ols_all |> map_dfr(broom::tidy) |> pull(estimate),
    ols_matched |> broom::tidy() |> pull(estimate))
)

ols_t <- sprintf(
  "%.02f",
  c(ols_all |> map_dfr(broom::tidy) |> pull(statistic),
    ols_matched |> broom::tidy() |> pull(statistic))
)

iv_all <- feols(
  is_aa ~ 1 | 
    csw(board_identifier, birthyr_cards + bpl_cards,
    exemption^married_cards, farmer + laborer + farm_laborer) |
    vet_combined ~ order_num_serial_norm,
  data = est_df,
  cluster = ~ serial_num
)

etable(
  iv_all, 
  signif.code = c("***" = 0.01, "**" = 0.05, "*" = 0.10),
  fitstat = ~ n + r2 + ivwald,
  fixef.group =  list(
    "-_Birth year and state" = c("Birth year", "Birth state"),
    "-_Exemption $\\times$ married" = c("Exemption-Married"),
    "-_Prewar occupation" = c("Farmer", "Laborer", "Farm laborer")
  ),
  extralines = list(
    "__Dep. var. mean (nonveterans)" = my_nonvet[1:4],
    "__OLS coefficient" = ols_coefs[1:4],
    "__OLS $t$-statistic" = ols_t[1:4]
  ),
  postprocess.tex = function(x) { 
    x <- gsub("Wald (1st stage), Veteran", "First stage $F$-statistic", x, fixed = TRUE)
    return(x)
  },
  tex = TRUE,
  replace = TRUE,
  file = file.path(tab_dir, "iv_aa.tex")
)

################################################################################
# Robustness to different definitions of occupation

est_df <- est_df |>
  mutate(
    # drop sport
    is_aa1 = occ_writer | occ_scholar | occ_religious | occ_professional |
      occ_military | occ_medical | occ_leader | occ_govt | occ_educator |
      occ_civilrights | occ_business | occ_art,
    # additionally drop art
    is_aa2 = occ_writer | occ_scholar | occ_religious | occ_professional |
      occ_military | occ_medical | occ_leader | occ_govt | occ_educator |
      occ_civilrights | occ_business,
    # additionally drop military
    is_aa3 = occ_writer | occ_scholar | occ_religious | occ_professional |
      occ_medical | occ_leader | occ_govt | occ_educator |
      occ_civilrights | occ_business,
    # civil rights occupations
    is_aa4 = occ_civilrights | occ_leader | occ_writer | occ_religious | 
      occ_educator | occ_professional
  )


ols_occs <- feols(
  c(is_aa, is_aa1, is_aa2, is_aa3, is_aa4) ~ vet_combined | board_identifier + birthyr_cards + bpl_cards +
    exemption^married_cards + farmer + laborer + farm_laborer,
  data = est_df |>
    filter(!is.na(order_num_serial_norm)),
  vcov = "hetero"
)

my_nonvet <- ols_occs |>
    map_dbl(\(x) {
      rhs <- model.matrix(x)
      lhs <-  model.matrix(x, type = "lhs")
      mean(lhs[rhs == 0])
    }) |>
    as.numeric()

ols_coefs <- sprintf(
  "%.04f",
  ols_occs |> map_dfr(broom::tidy) |> pull(estimate)
)

ols_t <- sprintf(
  "%.02f",
  ols_occs |> map_dfr(broom::tidy) |> pull(statistic)
)

iv_occs <- feols(
  c(is_aa, is_aa1, is_aa2, is_aa3, is_aa4) ~ 1 | board_identifier + birthyr_cards + bpl_cards +
    exemption^married_cards + farmer + laborer + farm_laborer |
    vet_combined ~ order_num_serial_norm,
  data = est_df |>
    filter(!is.na(order_num_serial_norm)),
  cluster = ~ serial_num
)

etable(
  iv_occs,
  signif.code = c("***" = 0.01, "**" = 0.05, "*" = 0.10),
  fitstat = ~ n + r2 + ivwald,
  dict = c(
    "is_aa" = "Community leader",
    "is_aa1" = "Community leader",
    "is_aa2" = "Community leader",
    "is_aa3" = "Community leader",
    "is_aa4" = "Community leader"
  ),
  fixef.group =  list(
    "-_Birth year and state" = c("Birth year", "Birth state"),
    "-_Exemption $\\times$ married" = c("Exemption-Married"),
    "-_Prewar occupation" = c("Farmer", "Laborer", "Farm laborer")
  ),
  extralines = list(
    "__Dep. var. mean (nonveterans)" = my_nonvet,
    "__OLS coefficient" = ols_coefs,
    "__OLS $t$-statistic" = ols_t
  ),
  headers = linebreak(c(
    "All",
    "Exclude\nsports",
    "Exclude\nsports, arts",
    "Exclude sports,\narts, military",
    "Civil rights\naffiliated"
  ), align="c"),
  postprocess.tex = function(x) { 
    x <- gsub("Wald (1st stage), Veteran", "First stage $F$-statistic", x, fixed = TRUE) 
    return(x)
  },
  depvar = FALSE,
  tex = TRUE,
  replace = TRUE,
  file = file.path(tab_dir, "iv_aa_robustness.tex")
)
